
import jieba.posseg as pseg
from dataset import SENTENCES

def demo(sentence:list = ["词性标注"]):
    ret_seg = []
    ret_tag = []
    for s in sentence:
        rs = pseg.lcut(s)
        ret_seg.append( [w for (w,t) in rs] )
        ret_tag.append( [t for (w,t) in rs] )
    return ret_seg,ret_tag


'''
一些特性
- paddle 模式
jieba 只会英文句子中的word tag标注为 eng

'''

if __name__ == "__main__":
    w,t = demo(SENTENCES)
    for i,s in enumerate(w):
        print(SENTENCES[i])
        print("/".join(w[i]) )
        print("/".join(t[i]) )
        print("" )

